MACHINE LEARNING WITH PYTHON BCA P 311
Page 1
Aditya Rawat
01290302021
PROGRAM 1
OBJECTIVE: Write a program to implement Logistic Regression.
CODE:
# import libraries and dataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv("insurance_data.csv")
#data Preprocessing
df.isnull().sum()
df.shape
df.describe()
#split the data
x=df.iloc[:,:1]
y=df.iloc[:,1:]
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.4,random_state=23)
#train the model
from sklearn.linear_model import LogisticRegression
model=LogisticRegression()
model.fit(x_train,y_train)
#model prediction
y_pred=model.predict(x_test)
#finding Evaluation matrics
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
MACHINE LEARNING WITH PYTHON BCA P 311
Page 2
Aditya Rawat
01290302021
import seaborn as sns
cf=confusion_matrix(y_test,y_pred)
plt.figure()
sns.heatmap(cf,annot=True)
plt.xlabel('Prediction')
plt.ylabel('Target')
plt.title('Confusion matrix')
from sklearn.metrics import accuracy_score
accuracy= accuracy_score(y_test,y_pred)
print("accuracy:{:.2f}%".format(accuracy*100))
from sklearn.metrics import precision_score,recall_score,f1_score
print("Precision score:{:.2f}%".format(precision_score(y_test,y_pred)*100))
print("Recall Score::{:.2f}%".format(recall_score(y_test,y_pred)*100))
print("f1 score:{:.2f}%".format(f1_score(y_test,y_pred)*100))
MACHINE LEARNING WITH PYTHON BCA P 311
Page 3
Aditya Rawat
01290302021
OUTPUT:
MACHINE LEARNING WITH PYTHON BCA P 311
Page 4
Aditya Rawat
01290302021
MACHINE LEARNING WITH PYTHON BCA P 311
Page 5
Aditya Rawat
01290302021
PROGRAM 2
OBJECTIVE: Write a program to implement Linear Regression with one variables.
CODE:
# import libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
# Load the dataset
data = {'Years_of_Experience': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'Salary':
[40000, 45000, 50000, 55000, 60000, 65000, 70000, 75000, 80000, 85000]}
df = pd.DataFrame(data)
# Split the data
X = df['Years_of_Experience'].values.reshape(-1, 1)
y = df['Salary'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)
# Train the model
model = LinearRegression()
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
MACHINE LEARNING WITH PYTHON BCA P 311
Page 6
Aditya Rawat
01290302021
# Plotting
plt.scatter(X, y, color='blue')
plt.plot(X, model.predict(X), color='red')
plt.title('Years of Experience vs. Salary')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.show()
MACHINE LEARNING WITH PYTHON BCA P 311
Page 7
Aditya Rawat
01290302021
OUTPUT:
MACHINE LEARNING WITH PYTHON BCA P 311
Page 8
Aditya Rawat
01290302021
PROGRAM 3
OBJECTIVE: Write a program to implement Linear Regression with two variables.
CODE:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df=pd.read_csv("boston-housing-dataset.csv")
df.isnull().sum()
df.shape
df.describe()
x= df.iloc[:,0:13] #diving data into x and y
x
y=df.iloc[:,13]
y
from sklearn.model_selection import train_test_split #dividing data into train and test based on
random state method
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.10,random_state=40)
from sklearn.preprocessing import StandardScaler
scaler =StandardScaler() # to call only standardize function and not to train(just call)
scaler.fit_transform(x_train) # fit is to give the standard scaler data to transform(standardize) and fit
into x train data
X_train =scaler.fit_transform(x_train) # only train
X_test = scaler.transform(x_test) # only change
from sklearn.linear_model import LinearRegression
#cross validation
from sklearn.model_selection import cross_val_score
# estimator
regression = LinearRegression()
MACHINE LEARNING WITH PYTHON BCA P 311
Page 9
Aditya Rawat
01290302021
regression.fit(X_train,y_train)
cross_val_score(regression,X_train,y_train,scoring='neg_mean_squared_error',cv=10) #these
values will be different for all systems as values are taken random so score will also vary
mse=cross_val_score(regression,X_train,y_train,scoring='neg_mean_squared_error',cv=10)
#storing values into mse
np.mean(mse)
#prediction
reg_pred =regression.predict(X_test)
reg_pred
import seaborn as sns
sns.displot(reg_pred-y_test)
from sklearn.metrics import r2_score
score =r2_score(reg_pred,y_test)
score
MACHINE LEARNING WITH PYTHON BCA P 311
Page 10
Aditya Rawat
01290302021
OUTPUT:
MACHINE LEARNING WITH PYTHON BCA P 311
Page 11
Aditya Rawat
01290302021
MACHINE LEARNING WITH PYTHON BCA P 311
Page 12
Aditya Rawat
01290302021
PROGRAM 4
OBJECTIVE: Write a program to implement the naïve Bayesian classifier for a sample training data
set stored as a .CSV file. Compute the accuracy of the classifier, considering few test data sets.
CODE:
import pandas as pd
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
data = pd.read_csv('tennis.csv')
print("The first 5 values of data is:\n",data.head())
#splitting data
X = data.iloc[:,:-1]
print("\nThe First 5 values of train data is\n",X.head())
y = data.iloc[:,-1]
print("\nThe first 5 values of Train outpu tis\n",y.head())
# Convert then in numbers
le_outlook = LabelEncoder()
X.outlook =le_outlook.fit_transform(X.outlook)
le_temp = LabelEncoder()
X.temp =le_temp.fit_transform(X.temp)
le_humidity = LabelEncoder()
X.humidity =le_humidity.fit_transform(X.humidity)
le_windy = LabelEncoder()
X.windy =le_windy.fit_transform(X.windy)
print("\nNow the Train data is :\n",X.head())
le_play = LabelEncoder()
MACHINE LEARNING WITH PYTHON BCA P 311
Page 13
Aditya Rawat
01290302021
y = le_play.fit_transform(y)
print("\nNow the Train output is\n",y)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =train_test_split(X,y, test_size=0.20)
classifier = GaussianNB()
classifier.fit(X_train,y_train)
from sklearn.metrics import accuracy_score
print("Accuracy is:",accuracy_score(classifier.predict(X_test),y_test))
MACHINE LEARNING WITH PYTHON BCA P 311
Page 14
Aditya Rawat
01290302021
OUTPUT:
MACHINE LEARNING WITH PYTHON BCA P 311
Page 15
Aditya Rawat
01290302021
PROGRAM 5
OBJECTIVE: Implement a K-Nearest Neighbors (KNN) classifier from scratch in Python. Use a
sample dataset, such as the Iris dataset, and split it into a training and testing set. Train the KNN
classifier on the training set and evaluate its performance on the testing set. Experiment with
different values of k and report the accuracy of the classifier.
CODE:
#importing libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
#Model creation from scratch
def most_common(lst):
return max(set(lst), key=lst.count)
def euclidean(point, data):
# Euclidean distance between points a & data
return np.sqrt(np.sum((point - data)**2, axis=1))
class KNeighborsClassifier:
def init (self, k=5, dist_metric=euclidean):
self.k = k
self.dist_metric = dist_metric
def fit(self, X_train, y_train):
self.X_train = X_train
self.y_train = y_train
def predict(self, X_test):
neighbors = []
for x in X_test:
MACHINE LEARNING WITH PYTHON BCA P 311
Page 16
Aditya Rawat
01290302021
distances = self.dist_metric(x, self.X_train)
y_sorted = [y for _, y in sorted(zip(distances, self.y_train))]
neighbors.append(y_sorted[:self.k])
return list(map(most_common, neighbors))
def evaluate(self, X_test, y_test):
y_pred = self.predict(X_test)
accuracy = sum(y_pred == y_test) / len(y_test)
return accuracy
# unpack the iris dataset.
iris = datasets.load_iris()
X = iris['data']
y = iris['target']
# Split data into train & test sets
X_train, X_test, y_train, y_test =train_test_split(X, y, test_size=0.20)
# Preprocessing of dataset
ss = StandardScaler().fit(X_train)
X_train, X_test = ss.transform(X_train),ss.transform(X_test)
# Test knn model across varying ks
accuracies = []
ks = range(1, 30)
for k in ks:
knn = KNeighborsClassifier(k=k)
knn.fit(X_train, y_train)
accuracy = knn.evaluate(X_test, y_test)
accuracies.append(accuracy)
#outputs
MACHINE LEARNING WITH PYTHON BCA P 311
Page 17
Aditya Rawat
01290302021
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score
accuracy=accuracy_score(y_test,knn.predict(X_test))
print("Accuracy: {:.2f}%".format(accuracy * 100))
print("Confusion Matrix:\n",confusion_matrix(y_test,knn.predict(X_test)))
print("Classification Report:\n",classification_report(y_test,knn.predict(X_test)))
# Visualize accuracy vs. k
fig, ax = plt.subplots()
ax.plot(ks, accuracies)
ax.set(xlabel="k",
ylabel="Accuracy",
title="Performance of knn")
plt.show()
MACHINE LEARNING WITH PYTHON BCA P 311
Page 18
Aditya Rawat
01290302021
OUTPUT:
MACHINE LEARNING WITH PYTHON BCA P 311
Page 19
Aditya Rawat
01290302021
MACHINE LEARNING WITH PYTHON BCA P 311
Page 20
Aditya Rawat
01290302021
PROGRAM 6
OBJECTIVE: Write a python program to implement support Vector Machine(SVM) classifier using
a library like scikit-learn. choose a suitable dataset for binary classification(e.g., the Breast Cancer
dataset ) and split it into training and testing sets. Train the SVM classifier on the training data and
evaluate its performance on the testing data, reporting metrics such as accuracy, precision, recall,
and F1-score. Experiment with different kernel functions (e.g., linear, radial basis function) and
compare their performance.
CODE:
#importing libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,precision_score, recall_score, f1_score,confusion_matrix
# Loading the dataset
cancer = datasets.load_breast_cancer()
X = cancer.data
y = cancer.target
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test =train_test_split(X, y, test_size=0.2,random_state=42)
#Model creation
def evaluate_classifier(clf, X_test, y_test):
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
precision = precision_score(y_test,y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
confusion_mat =confusion_matrix(y_test, y_pred)
MACHINE LEARNING WITH PYTHON BCA P 311
Page 21
Aditya Rawat
01290302021
return accuracy, precision, recall, f1,confusion_mat
# Try SVM with different kernel functions
kernel_functions = ['linear','rbf']
for kernel in kernel_functions:
# Create and train the SVM classifier
svm_classifier = SVC(kernel=kernel)
svm_classifier.fit(X_train, y_train)
# Evaluate the classifier
accuracy, precision, recall, f1,confusion_mat =evaluate_classifier(svm_classifier, X_test, y_test)
#results
print(f'\nResults for SVM with {kernel} kernel:')
#Output:
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-score: {f1:.4f}')
print(f'Confusion Matrix:\n{confusion_mat}')
MACHINE LEARNING WITH PYTHON BCA P 311
Page 22
Aditya Rawat
01290302021
OUTPUT:
MACHINE LEARNING WITH PYTHON BCA P 311
Page 23
Aditya Rawat
01290302021
PROGRAM 7
OBJECTIVE: Given a dataset containing features and labels, implement a Random Forest
classification model using Python and a library like scikit-learn. Split the dataset into training and
testing sets, train the model, and evaluate its performance using metrics like accuracy, precision, and
recall.
CODE:
#importing libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.datasets import load_iris
#loading dataset
iris.data = load_iris()
X = iris.data
y = iris.target
X
y
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test =train_test_split(X, y, test_size=0.30,random_state=42)
# Creating a Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
# Training the model
rf_classifier.fit(X_train, y_train)
# Making predictions on test set
y_pred = rf_classifier.predict(X_test)
# Evaluating the classifier
MACHINE LEARNING WITH PYTHON BCA P 311
Page 24
Aditya Rawat
01290302021
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
conf_mat = confusion_matrix(y_test, y_pred)
# Printing performance metrics
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print('Confusion Matrix:')
print(conf_mat)
# Plotting the confusion matrix
plt.imshow(conf_mat, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()
MACHINE LEARNING WITH PYTHON BCA P 311
Page 25
Aditya Rawat
01290302021
OUTPUT:
MACHINE LEARNING WITH PYTHON BCA P 311
Page 26
Aditya Rawat
01290302021
MACHINE LEARNING WITH PYTHON BCA P 311
Page 27
Aditya Rawat
01290302021
PROGRAM 8
OBJECTIVE: Design a Hebb Net to implement logical AND function.
CODE:
#ImportingLibrary.
import numpy as np
# Define the input patterns and target for the AND function
input_patterns = np.array([[1, 1], [1, -1], [-1, 1], [-1, -1]])
target_output = np.array([1, -1, -1, -1])
weights = np.zeros(input_patterns.shape[1])
bias = 0
weights
# Train the Hebb Net
for i in range(input_patterns.shape[0]):
weights += input_patterns[i] * target_output[i]
bias += target_output[i]
print(f"weights{i}: ", weights, f"\nbias{i}: ", bias)
# Define function to make predictions with trained Hebb Net
def predict(input_pattern):
return 1 if np.dot(input_pattern, weights) + bias >= 0 else 0
# Test the trained Hebb Net
for input_pattern in input_patterns:
print(f"Input: {input_pattern}, Output: {predict(input_pattern)}")
MACHINE LEARNING WITH PYTHON BCA P 311
Page 28
Aditya Rawat
01290302021
OUTPUT:
MACHINE LEARNING WITH PYTHON BCA P 311
Page 29
Aditya Rawat
01290302021
PROGRAM 9
OBJECTIVE: Apply k-Means algorithm to cluster a set of data stored in a .CSV file. Use the same
data set for clustering using the Agglomerative Clustering algorithm. Compare the results of these
two algorithms and comment on the quality of clustering. You can add Python ML library classes in
the program.
CODE:
#import libraries
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans, AgglomerativeClustering
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score
from sklearn.datasets import load_iris
# Loading dataset
iris = load_iris()
X = iris.data
# we are using the first two features for clustering
X = X[:, :2]
# No need to scale the features in this
#Apply k-Means clustering
kmeans = KMeans(n_clusters=3,random_state=42)
kmeans_labels = kmeans.fit_predict(X)
# Apply hierarchical clustering forcomparison
hierarchical =AgglomerativeClustering(n_clusters=3)
hierarchical_labels =hierarchical.fit_predict(X)
# Visualize the results
plt.figure(figsize=(12, 5))
MACHINE LEARNING WITH PYTHON BCA P 311
Page 30
Aditya Rawat
01290302021
plt.subplot(1, 2, 1)
plt.scatter(X[:, 0], X[:, 1], c=kmeans_labels,cmap='viridis', edgecolors='k', s=50)
plt.title('k-Means Clustering')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.subplot(1, 2, 2)
plt.scatter(X[:, 0], X[:, 1],
c=hierarchical_labels, cmap='viridis',edgecolors='k', s=50)
plt.title('Hierarchical Clustering')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.tight_layout()
plt.show()
# Compare clustering quality using silhouette score
kmeans_silhouette = silhouette_score(X,kmeans_labels)
#Output
hierarchical_silhouette = silhouette_score(X,hierarchical_labels)
print(f'Silhouette Score - k-Means:{kmeans_silhouette:.4f}')
print(f'Silhouette Score - Hierarchical:{hierarchical_silhouette:.4f}')
MACHINE LEARNING WITH PYTHON BCA P 311
Page 31
Aditya Rawat
01290302021
OUTPUT:
MACHINE LEARNING WITH PYTHON BCA P 311
Page 32
Aditya Rawat
01290302021
MACHINE LEARNING WITH PYTHON BCA P 311
Page 33
Aditya Rawat
01290302021
PROGRAM 10
OBJECTIVE: Write a Python program to implement a Self-Organizing Map (SOM) and train it on a
given dataset, such as a collection of 2D points. Allow the user to specify parameters like the map
size, learning rate, and number of training iterations. Visualize the map before and after training to
observe how it adapts to the data.
CODE:
#numpy based SOM implimentation
!pip install minisom
#importing libraries
import numpy as np
from minisom import MiniSom
import matplotlib.pyplot as plt
#for visualization
def visualize_som(som, data, title):
plt.figure(figsize=(5, 5))
plt.pcolor(som.distance_map().T, cmap='bone_r') # plot the distance map as background
plt.colorbar()
#for visualization
def visualize_som(som, data, title):
plt.figure(figsize=(5, 5))
plt.pcolor(som.distance_map().T, cmap='bone_r') # plot the distance map as background
plt.colorbar()
# plot points on the map
for i (x, _) in enumerate(data):
w = som.winner(x)
plt.plot(w[0] + 0.5, w[1] + 0.5, 'o',markerfacecolor='None', markersize=10,markeredgecolor='r',
markeredgewidth=2)
plt.text(w[0] + 0.5, w[1] + 0.5, str(i + 1),color='k', fontweight='bold',ha='center', va='center')
MACHINE LEARNING WITH PYTHON BCA P 311
Page 34
Aditya Rawat
01290302021
plt.title("SOM")
plt.show()
# Generate synthetic 2D data
np.random.seed(42)
data = np.random.rand(100, 2) # replace this with your own dataset
# User-defined parameters
map_size = (10, 10) # SOM map size
learning_rate = 0.5 # initial learning rate
num_iterations = 1000 # number of training iterations
# Visualize the SOM before training
visualize_som(som, data, title="SOM Before Training")
# Create and train the SOM
som = MiniSom(*map_size, 2, sigma=1.0, learning_rate=learning_rate)
som.random_weights_init(data)
print("Training SOM...")
som.train_random(data, num_iterations)
print("Training complete.")
# Visualize the SOM after training
visualize_som(som, data, title="SOM After Training")
MACHINE LEARNING WITH PYTHON BCA P 311
Page 35
Aditya Rawat
01290302021
OUTPUT:
MACHINE LEARNING WITH PYTHON BCA P 311
Page 36
Aditya Rawat
01290302021
MACHINE LEARNING WITH PYTHON BCA P 311
Page 37
Aditya Rawat
01290302021
MACHINE LEARNING WITH PYTHON BCA P 311
Page 38
Aditya Rawat
01290302021
PROGRAM 11
OBJECTIVE: (A) Take a binary classification dataset and implement both the K-Nearest Neighbors
(KNN) and Support Vector Machine (SVM) classifiers using Python. Compare the performance of
these two algorithms on metrics such as accuracy, precision, recall, and F1-score. Visualize the
decision boundaries for both algorithms.
CODE:
#installing mlxtend
pip install mlxtend
#importing necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from mlxtend.plotting import plot_decision_regions
# Loading Iris dataset (for binary classification)
iris = datasets.load_iris()
X = iris.data[:, :2] # Selecting only the first two features for visualization
y = (iris.target != 0).astype(int) # Convert to binary classification
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test =train_test_split(X, y, test_size=0.20,random_state=32)
# K-Nearest Neighbors (KNN) Classifier
knn_classifier = KNeighborsClassifier(n_neighbors=3)
knn_classifier.fit(X_train, y_train)
y_pred_knn = knn_classifier.predict(X_test)
MACHINE LEARNING WITH PYTHON BCA P 311
Page 39
Aditya Rawat
01290302021
# Support Vector Machine (SVM) Classifier
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train, y_train)
y_pred_svm = svm_classifier.predict(X_test) #
Evaluate classifiers
def evaluate_classifier(y_true, y_pred):
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
confusion_mat = confusion_matrix(y_true, y_pred)
return accuracy, precision, recall, f1,confusion_mat
# Evaluate KNN classifier
accuracy_knn, precision_knn, recall_knn,f1_knn, confusion_mat_knn = evaluate_classifier(y_test,
y_pred_knn)
# Evaluate SVM classifier
accuracy_svm, precision_svm, recall_svm,f1_svm, confusion_mat_svm = evaluate_classifier(y_test,
y_pred_svm)
# Visualize decision boundaries
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
fig.suptitle('Decision Boundaries of KNN and SVM')
# Decision boundary for KNN
plot_decision_regions(X, y, clf=knn_classifier, legend=2, ax=axes[0])
axes[0].set_title('K-Nearest Neighbors (KNN)')
# Decision boundary for SVM
plot_decision_regions(X, y,
clf=svm_classifier, legend=2, ax=axes[1])
MACHINE LEARNING WITH PYTHON BCA P 311
Page 40
Aditya Rawat
01290302021
axes[1].set_title('Support Vector Machine(SVM)')
plt.show()
# Print performance metrics
print('\nPerformance Metrics for K-Nearest Neighbors (KNN):')
print(f'Accuracy: {accuracy_knn:.4f}')
print(f'Precision: {precision_knn:.4f}')
print(f'Recall: {recall_knn:.4f}')
print(f'F1-score: {f1_knn:.4f}')
print(f'Confusion Matrix:\n{confusion_mat_knn}')
print('\nPerformance Metrics for Support Vector Machine (SVM):')
print(f'Accuracy: {accuracy_svm:.4f}')
print(f'Precision: {precision_svm:.4f}')
print(f'Recall: {recall_svm:.4f}')
print(f'F1-score: {f1_svm:.4f}')
print(f'Confusion Matrix:\n{confusion_mat_svm}')
MACHINE LEARNING WITH PYTHON BCA P 311
Page 41
Aditya Rawat
01290302021
OUTPUT:
MACHINE LEARNING WITH PYTHON BCA P 311
Page 42
Aditya Rawat
01290302021
MACHINE LEARNING WITH PYTHON BCA P 311
Page 43
Aditya Rawat
01290302021
B) Given a dataset of customer churn, implement a program that compares the performance of three
different supervised learning algorithms (e.g., Logistic Regression, Random Forest, and Support
Vector Machine) for binary classification. Split the dataset into training and testing sets, train each
algorithm on the training set, and evaluate their performance using metrics like accuracy, precision,
recall, and F1-score. Present the results in a clear and informative way, such as through a bar chart
or a table.
CODE:
#importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
# Load your dataset
dataset_path = 'telecom_churn.csv'
data = pd.read_csv(dataset_path)
data.head()
# Assuming your dataset has a 'Churn' column indicating binary labels (1 for churn, 0 for nonchurn)
X = data.drop('Churn', axis=1)
y = data['Churn']
X
y
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test =train_test_split(X, y, test_size=0.20,random_state=32)
# Define the classifiers
classifiers = {'Logistic Regression': LogisticRegression(), 'Random Forest':
RandomForestClassifier(),'Support Vector Machine': SVC()}
MACHINE LEARNING WITH PYTHON BCA P 311
Page 44
Aditya Rawat
01290302021
# Train and evaluate each classifier
results = {'Classifier': [], 'Accuracy': [],'Precision': [], 'Recall': [], 'F1-Score': []}
for clf_name, clf in classifiers.items():
# Train the classifier
clf.fit(X_train, y_train) #
Predict on the test set
y_pred = clf.predict(X_test)
# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
# Store results
results['Classifier'].append(clf_name)
results['Accuracy'].append(accuracy)
results['Precision'].append(precision)
results['Recall'].append(recall)
results['F1-Score'].append(f1)
# Convert results to a DataFrame for easy visualization
results_df = pd.DataFrame(results)
# Plot the results using a bar chart
plt.figure(figsize=(7, 5))
for metric in ['Accuracy', 'Precision','Recall', 'F1-Score']:
plt.bar(results_df['Classifier'],results_df[metric], label=metric)
plt.title('Performance Comparison of Classifiers')
plt.xlabel('Classifier')
MACHINE LEARNING WITH PYTHON BCA P 311
Page 45
Aditya Rawat
01290302021
plt.ylabel('Score')
plt.legend()
plt.show()
# Display the results in tabular form
print("Results:")
print(results_df)
MACHINE LEARNING WITH PYTHON BCA P 311
Page 46
Aditya Rawat
01290302021
OUTPUT:
MACHINE LEARNING WITH PYTHON BCA P 311
Page 47
Aditya Rawat
01290302021
MACHINE LEARNING WITH PYTHON BCA P 311
Page 48
Aditya Rawat
01290302021
MACHINE LEARNING WITH PYTHON BCA P 311
Page 49
Aditya Rawat
01290302021
PROGRAM 12
OBJECTIVE: Write a Python program that loads a dataset and performs an empirical comparison
of three different clustering algorithms, such as K-Means, Hierarchical Agglomerative Clustering,
and DBSCAN. Evaluate and compare their performance in terms of cluster quality metrics like
Silhouette Score or Inertia, and visualize the results.
CODE:
#importing libraries
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans,AgglomerativeClustering, DBSCAN
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
# Load the dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target
X
# Standardize the features
scaler = StandardScaler()
X_std = scaler.fit_transform(X)
# Apply PCA for visualization purposes (2D plot)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_std)
# Define clustering algorithms
kmeans = KMeans(n_clusters=3, random_state=42)
MACHINE LEARNING WITH PYTHON BCA P 311
Page 50
Aditya Rawat
01290302021
hierarchical = AgglomerativeClustering(n_clusters=3)
dbscan = DBSCAN(eps=0.8, min_samples=5)
algorithms = [kmeans, hierarchical, dbscan]
algorithm_names = [ 'K-Means','Hierarchical Agglomerative','DBSCAN']
# Evaluate and visualize each clustering algorithm
for algorithm, algorithm_name in zip(algorithms, algorithm_names):
# Fit the clustering algorithm to the data
if algorithm_name != 'DBSCAN':
algorithm.fit(X_std)
labels=algorithm.labels_
else:
labels = algorithm.fit_predict(X_std)
# Evaluate clustering quality using Silhouette Score
silhouette_avg = silhouette_score(X_std, labels)
print(f"{algorithm_name} Silhouette Score: {silhouette_avg:.4f}")
# Visualize the clustering results in a 2D
plot plt.figure(figsize=(8, 5))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=labels, cmap='viridis', edgecolor='k', s=50)
plt.title(f'{algorithm_name} Clustering Results (Silhouette Score: {silhouette_avg:.4f})')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()
MACHINE LEARNING WITH PYTHON BCA P 311
Page 51
Aditya Rawat
01290302021
OUTPUT:
MACHINE LEARNING WITH PYTHON BCA P 311
Page 52
Aditya Rawat
01290302021
MACHINE LEARNING WITH PYTHON BCA P 311
Page 53
Aditya Rawat
01290302021